package cz.semjobKB.extract.api.impl;

import java.io.File;
import java.io.IOException;

import org.apache.log4j.Logger;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;
import org.springframework.stereotype.Component;

import cz.semjobKB.extract.api.IExtractText;

@Component
public class ExtractFromPdf implements IExtractText {

	private static Logger logger = Logger.getLogger(ExtractFromPdf.class);
	
	
	public String extractText(File file) throws IOException {		
		
		logger.info("Extracting text from PDF document: " + file.getAbsolutePath());
				
		PDFTextStripper stripper = new PDFTextStripper();
		PDDocument document = PDDocument.load(file);
		String text = stripper.getText(document);
		
		logger.info("Text has been extracted from PDF document.");
		
		return text;
		
	}

}
